In [1]:
import altair as alt
import pandas as pd
import geopandas as gpd
In [2]:
alt.data_transformers.disable_max_rows()
Out[2]:
DataTransformerRegistry.enable('default')
In [3]:
data = pd.read_csv('data/population_prospects_all_countries.csv')
In [4]:
data = data.drop(data.loc[(data['year']==2020) & (data['variant']=='Medium variant'), 'variant'].index, axis=0)
data = data.reset_index(drop=True)
data['population'] = data['population']*1000 # i find it easier to work with exact numbers
data.head()
Out[4]:
variant country country_code parent_code year population
0 Estimates Burundi 108 910 1950 2308927.0
1 Estimates Comoros 174 910 1950 159459.0
2 Estimates Djibouti 262 910 1950 62000.0
3 Estimates Eritrea 232 910 1950 822347.0
4 Estimates Ethiopia 231 910 1950 18128030.0
In [5]:
data = data.rename(columns={'year':'Year', 'population':'Population', 'country':'Country'})
data['variant'] = data['variant'].str.replace('Medium variant', 'Prediction (medium variant)')
In [6]:
# таблиця-ланка для поєднання карти й даних населення
country_codes = pd.read_csv('country-codes.csv', usecols=['ISO3166-1-numeric', 'ISO3166-1-Alpha-3'])
country_codes.columns
country_codes = country_codes.rename(columns = {'ISO3166-1-numeric':'country_code', 'ISO3166-1-Alpha-3':'id'})
country_codes.head()
Out[6]:
id country_code
0 TWN 158.0
1 AFG 4.0
2 ALB 8.0
3 DZA 12.0
4 ASM 16.0
In [7]:
data = pd.merge(data, country_codes, how='left', on='country_code')
In [8]:
pop_growth_percent = data[(data['Year']==2020) | (data['Year']==2100)]\
    .pivot_table(index=['Country', 'country_code', 'id'], columns=['Year'], values='Population')\
    .reset_index()\
    .rename(columns={2020:'2020', 2100:'2100'}) # altair requires colnames to be strings
pop_growth_percent['Change'] = pop_growth_percent['2100'] - pop_growth_percent['2020']
pop_growth_percent['Change'] = pop_growth_percent['Change']/pop_growth_percent['2020']
pop_growth_percent.head()
Out[8]:
Year Country country_code id 2020 2100 Change
0 Afghanistan 4 AFG 38928341.0 74937961.0 0.925023
1 Albania 8 ALB 2877800.0 1088338.0 -0.621816
2 Algeria 12 DZA 43851043.0 70704619.0 0.612382
3 American Samoa 16 ASM 55197.0 36156.0 -0.344964
4 Andorra 20 AND 77265.0 62406.0 -0.192312
In [9]:
! wget https://raw.githubusercontent.com/datasets/country-codes/master/data/country-codes.csv
--2022-04-24 23:40:37--  https://raw.githubusercontent.com/datasets/country-codes/master/data/country-codes.csv
SSL_INIT
Loaded CA certificate '/etc/ssl/certs/ca-certificates.crt'
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.108.133, 185.199.109.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 129955 (127K) [text/plain]
Saving to: ‘country-codes.csv.2’

country-codes.csv.2 100%[===================>] 126,91K  --.-KB/s    in 0,07s   

2022-04-24 23:40:37 (1,69 MB/s) - ‘country-codes.csv.2’ saved [129955/129955]

In [9]:
world = gpd.read_file('../02/practice/data/world-countries.json')
world = world.rename(columns={'name':'Country'})
In [10]:
# деякі виправлення для заповнення місметчів
world.loc[world['Country']=='Western Sahara', 'id'] = 'ESH' #732
world.loc[world['Country']=='Somaliland', 'id'] = 'SOM' #706
world.loc[world['Country']=='South Sudan', 'id'] = 'SSD'
In [11]:
world_pop_change = world.merge(pop_growth_percent.drop(columns=['Country']), how='left', on='id')
world_pop_change.head()
Out[11]:
id Country geometry country_code 2020 2100 Change
0 AFG Afghanistan POLYGON ((61.21082 35.65007, 62.23065 35.27066... 4.0 38928341.0 74937961.0 0.925023
1 AGO Angola MULTIPOLYGON (((16.32653 -5.87747, 16.57318 -6... 24.0 32866268.0 188283132.0 4.728765
2 ALB Albania POLYGON ((20.59025 41.85540, 20.46317 41.51509... 8.0 2877800.0 1088338.0 -0.621816
3 ARE United Arab Emirates POLYGON ((51.57952 24.24550, 51.75744 24.29407... 784.0 9890400.0 12909869.0 0.305293
4 ARG Argentina MULTIPOLYGON (((-65.50000 -55.20000, -66.45000... 32.0 45195777.0 56802493.0 0.256810
In [12]:
selector = alt.selection_single(fields=['id'], name='id', empty='all')
In [70]:
choropleth = alt.Chart(world_pop_change).project(type = 'mercator')\
                .mark_geoshape(stroke = '#C2C0C0', strokeWidth=1).encode(
    tooltip = [alt.Tooltip('Country:N'),
              alt.Tooltip('Change', format='.1%')],
    color = alt.Color('Change:Q',
                      scale=alt.Scale(scheme='purplegreen',
                                      domain=[-5.81,5.81]),
                      legend=alt.Legend(title=['Change in population'], 
                                        titleAlign='center',
                                        titleAnchor='middle',
                                        orient='none',
                                        legendX=490, legendY=10,
                                        direction='horizontal',
                                        gradientLength=150.0,
                                        format='%')
                     ),
    opacity=alt.condition(selector, 
                          alt.value(1.0), 
                          alt.value(0.3)),
).add_selection(selector).properties(
    title=alt.TitleParams(text='Population changes by country, 2020-2100', 
                                                           subtitle=['predicted proportional to 2020 estimates']))
In [71]:
linechart = alt.Chart(data).mark_line().encode(
    x=alt.X('Year:Q', axis=alt.Axis(format='04d'),
           scale=alt.Scale(domain=[1950, 2100])),
    y=alt.Y('Population:Q', 
            # aggregate='sum', 
            scale=alt.Scale(domain=[0, 1e9+7e8]),
            axis=alt.Axis(format='s'), title='Population'),
    detail='Country:N',
    color=alt.value('#14652E'),
    strokeDash=alt.StrokeDash(field='variant', 
                              scale=alt.Scale(
                                  domain=['Estimates', 'Prediction (medium variant)'],
                                  range=[[1, 0], [3, 3]]),
                                  legend=alt.Legend(title='Variant',
                                                   orient='none',
                                                   legendX=710, legendY=10)),
    tooltip=alt.Tooltip(['Country:N', 'Population:Q', 'Year:Q']),
    opacity=alt.condition(selector, 
                          alt.value(1.0), 
                          alt.value(0.0))
).add_selection(selector).properties(
    title=alt.TitleParams(text=['Population changes by country, 1950-2100'], 
                          subtitle=['historical data and predictions, absolute figures']))
In [85]:
alt.hconcat(choropleth.properties(width=650, height=550), 
            linechart.properties(width=300, height=550), 
            title = alt.TitleParams(text=['Hover mouse cursor over a country to see its name and exact figures',
                                          'Click on a country on the map to highlight it and select in the linegchart to the right'], 
                                    baseline='bottom',
                                    orient='bottom',
                                    anchor='start',
                                    fontWeight='normal',
                                    fontSize=12)
           ).configure_concat(spacing=0)
Out[85]: